---
title: "1 - Database"
author: "Arturo Bertero"
date: ""
output: html_document
---

# Libraries

```{r}
#packages
library("pacman")
p_load(tidyverse, here, sjlabelled, stringr, glue, janitor, haven, kableExtra, 
       ltm, skimr, readxl, naniar, mgm, conflicted, mice, MissMech, stargazer)

conflicts_prefer(haven::zap_labels)
conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::select)


#remove scientific notation
options(scipen=999)
```

# Input

```{r}
# You can download the dataset at: 
# https://dataverse.unimi.it/dataset.xhtml?persistentId=doi:10.13130/RD_UNIMI/IJDSVS

#Load original data
#response_original =  read_dta(here("Input", "v1.0_ResPOnsE_COVID_19_W1-W6_ENGLISH.dta"))  %>% 
#  clean_names()

#saveRDS for github
#saveRDS(response_original, here("Input", "v1.0_ResPOnsE_COVID_19_W1-W6_ENGLISH.rds"))

#Load data as rds
response_original = read_rds(here("Input", "v1.0_ResPOnsE_COVID_19_W1-W6_ENGLISH.rds"))
```

# Processing: W5; 20 Oct to 15 Dec 2022

## IPBS data

```{r}
# Filter for wave, recode, rename
IPBS <- response_original %>%
  filter(info_wave == 5) %>%
  dplyr::select(c(s1,s2,s9,c1,k1,k6,k2,
                  pe7_02,pe7_03,pe7_04,t1,t5,t6)) %>%
  mutate(s1 = replace(s1, s1 > 2, NA)) %>% #manage sex
  rename(
    sex = s1,
    age = s2,
    educ = s9,
    hh_income = c1, 
    pol_int = k1,
    vote = k6,
    L_R = k2,
    abort = pe7_02,
    eutha = pe7_03,
    marria = pe7_04,
    redis = t1,
    globa = t5,
    immig = t6
  ) 

# Esclude rows who did not see the topic module
IPBS <- IPBS %>%
  filter(if_all(c(abort, eutha, marria, redis, globa, immig), ~ !is.na(.)))

#Recode as factors 
IPBS = IPBS %>%
  mutate(sex = case_when(sex==1 ~ "Male",
                         sex==2 ~ "Female or other",
                         sex==3 ~ "Female or other"),
         educ_cat = case_when(educ<=6 ~ "Less than university",
                          educ>6 ~ "Degree or more"), 
         pol_int_cat = case_when(pol_int==1 ~ "Low",
                             pol_int==2 ~ "Medium-low",
                             pol_int==3 ~ "Medium-high",
                             pol_int==4 ~ "High"), 
         hh_income_cat = case_when(hh_income==1 ~ "Low",
                             hh_income==2 ~ "Medium-low",
                             hh_income==3 ~ "Medium-high",
                             hh_income==4 ~ "High"),
         vote_cat = case_when(
                          vote==2 | vote==3 | vote==5 | vote==11 ~ "Dx",
                          vote==4 ~ "M5S",
                          vote==1 | vote==6 | vote==7 | vote==10 ~ "Sin",
                          vote==8 | vote==9 | vote==12 ~ "Other",
                          vote==13 | vote==14 | vote==15 ~ "No Vote"),
         L_R_cat = case_when(
                          L_R==0 | L_R==1 | L_R==2 | L_R==3 ~ "Left",
                          L_R==4 | L_R==5 | L_R==6 ~ "Center",
                          L_R==7 | L_R==8 | L_R==9 | L_R==10 ~ "Right",
                          L_R==98 | L_R==99 ~ "No lr"))

# Na omit and assign 99 as na
IPBS <- IPBS %>%
  mutate(across(where(is.numeric), ~replace(., . == 99, NA)))

# Visualize missing
vis_miss(IPBS)

#Recode polarity: for everything but sociodem high values = endorsment 
IPBS = IPBS %>% 
  mutate(across(pol_int, ~ 5 - .),
         across(abort, ~ 5 - .),
         across(hh_income, ~ 5 - .),
         across(globa, ~ 7 - .)) 


#Reorder dataset
IPBS = IPBS %>% 
  dplyr::select(c(
"L_R_cat", 
"abort","eutha","marria", #cultural
"redis","globa","immig", # economic
"vote", "vote_cat","sex","age", "educ", "educ_cat", 
"pol_int", "pol_int_cat", "hh_income", "hh_income_cat" #sociodem/predictors
))

```

## MICE imputation

```{r}
# Be sure vars are factors
IPBS_mice <- IPBS %>%
  mutate(across(where(is.character), as.factor)) %>%
  zap_labels()

# Run MICE
set.seed(123)
IPBS_mice <- mice(IPBS_mice, m = 5, maxit = 20, seed = 123)

#Extract completed dataset (first imputation)
IPBS_imputed <- complete(IPBS_mice, 1)

# Check there are no NAs
vis_miss(IPBS_imputed)

# Na.omit
IPBS_imputed = IPBS_imputed %>% na.omit()

#Check assumption: are NAs random?
mcar_data <- IPBS %>% select(where(is.numeric))
TestMCARNormality(mcar_data)

# Desc
stargazer(as.data.frame(IPBS_imputed), type = "text", nobs = TRUE, 
          out = here("Output", "Supplement", "Table_1_1.doc"))
```


# Not imputed data

```{r}
# na.omit
IPBS = IPBS %>% 
  na.omit()

# Desc
stargazer(as.data.frame(IPBS), type = "text", nobs = TRUE, 
          out = here("Output", "Supplement", "Table_1.doc"))
```

# Output

```{r}
#Save IPBS
saveRDS(IPBS, here("Input", "IPBS.rds"))

#Save IPBS_imputed
saveRDS(IPBS_imputed, here("Input", "IPBS_imputed.rds"))

```

